home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Cream of the Crop 22
/
Cream of the Crop 22.iso
/
program
/
cgazv3n4.zip
/
ROOT-DIR.ZIP
/
BM.C
next >
Wrap
C/C++ Source or Header
|
1989-05-31
|
6KB
|
221 lines
/***************************************************************
*
* Boyer-Moore string search routine
*
* Author: John Rex
* References: (1) Boyer RS, Moore JS: "A fast string searching
* algorithm" CACM 20(10):762-777, 1977
* (2) plus others--see text of article
*
* Compilers: Microsoft C V5.1 - compile as is
* Turbo C V2.0 - compile as is
*
* Compile time preprocessor switches:
* DEBUG - if defined, include test driver
*
* Usage:
*
* char *pattern, *text; - search for pattern in text
* unsigned length; - length of text (the routine does
* NOT stop for '\0' bytes, thus
* allowing it to search strings
* stored sequentially in memory.
* char *start; - pointer to match
*
* char *Boyer_Moore(char *, char *, unsigned);
*
* start = Boyer_Moore(pattern, text, strlen(text);
*
* NULL is returned if the search fails.
*
* Switches: if defined:
*
* DEBUG will cause the search routine to dump its tables
* at various times--this is useful when trying to
* understand how upMatchJump is generated
*
* DRIVER will cause a test drive to be compiled
*
* Source code may be used freely if source is acknowledged.
* Object code may be used freely.
**************************************************************/
#define DRIVER
#if defined(DEBUG)
#define SHOWCHAR for (uT=1; uT<=uPatLen; uT++) \
printf(" %c ", pcPattern[uT-1])
#define SHOWJUMP for (uT=1;uT<=uPatLen;uT++) \
printf("%2d ", upMatchJump[uT])
#define SHOWA printf(" uA = %u ", uA)
#define SHOWB printf(" uB = %u", uB)
#define SHOWBACK for (uT=1;uT<=uPatLen;uT++) \
printf("%2d ", upBackUp[uT])
#define NL printf("\n")
unsigned uT;
#else
#define SHOWCHAR
#define SHOWJUMP
#define SHOWA
#define SHOWB
#define SHOWBACK
#define NL
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define AlphabetSize 256
char *Boyer_Moore(pcPattern, pcText, uTextLen)
char *pcPattern; /* we search for this ... */
char *pcText; /* ... in this text ... */
unsigned uTextLen; /* ... up to this length */
{
/* array of character mis-match offsets */
unsigned uCharJump[AlphabetSize];
/* array of offsets for partial matches */
unsigned *upMatchJump;
/* temporary array for upMatchJump calc */
unsigned *upBackUp;
unsigned u, uPatLen;
unsigned uText, uPat, uA, uB;
/* Setup and initialize arrays */
uPatLen = strlen(pcPattern);
upMatchJump = (unsigned *)
malloc(2 * (sizeof(unsigned) * (uPatLen + 1)) );
upBackUp = upMatchJump + uPatLen + 1;
/* Heuristic #1 -- simple char mis-match jumps ... */
memset(uCharJump, 0, AlphabetSize*sizeof(unsigned));
for (u = 0 ; u < uPatLen; u++)
uCharJump[((unsigned char) pcPattern[u])]
= uPatLen - u - 1;
/* Heuristic #2 -- offsets from partial matches ... */
for (u = 1; u <= uPatLen; u++)
upMatchJump[u] = 2 * uPatLen - u;
/* largest possible jump */
SHOWCHAR; NL;
SHOWJUMP; NL;
u = uPatLen;
uA = uPatLen + 1;
while (u > 0) {
upBackUp[u] = uA;
while( uA <= uPatLen &&
pcPattern[u - 1] != pcPattern[uA - 1]) {
if (upMatchJump[uA] > uPatLen - u)
upMatchJump[uA] = uPatLen - u;
uA = upBackUp[uA];
}
u--;
uA--;
}
SHOWJUMP; SHOWA; SHOWBACK; NL;
for (u = 1; u <= uA; u++)
if (upMatchJump[u] > uPatLen + uA - u)
upMatchJump[u] = uPatLen + uA - u;
uB = upBackUp[uA];
SHOWJUMP; SHOWB; NL;
while (uA <= uPatLen) {
while (uA <= uB) {
if (upMatchJump[uA] > uB - uA + uPatLen)
upMatchJump[uA] = uB - uA + uPatLen;
uA++;
}
uB = upBackUp[uB];
}
SHOWJUMP; NL;
/* now search */
uPat = uPatLen; /* tracks position in Pattern */
uText = uPatLen - 1; /* tracks position in Text */
while (uText < uTextLen && uPat != 0) {
if (pcText[uText] == pcPattern[uPat - 1]) { /* match? */
uText--; /* back up to next */
uPat--;
}
else { /* a mismatch - slide pattern forward */
uA = uCharJump[((unsigned char) pcText[uText])];
uB = upMatchJump[uPat];
uText += max(uA, uB); /* select larger jump */
uPat = uPatLen;
}
}
/* return our findings */
if (uPat == 0)
return(pcText + (uText + 1)); /* have a match */
else
return (NULL); /* no match */
}
#pragma subtitle("Test Driver")
#ifdef DRIVER
#define PATTERN argv[1]
#define TEXT_SIZE 32000u
void main(argc, argv)
int argc;
char **argv;
{
char *start, *p;
char text[TEXT_SIZE];
int i;
unsigned length, count;
FILE *infile;
if (argc != 3) {
puts("Usage is: bm pattern file\n");
exit(0);
}
if ((infile = fopen(argv[2], "r")) == NULL) {
puts("Can't open input file\n");
exit(0);
}
/* read in whole file */
length = fread(text, 1, TEXT_SIZE, infile);
fclose(infile);
p=text;
count=0;
while (count < length) {
if (*p == '\n')
*p = '\0';
p++;
count++;
}
/* now search repeatedly */
start = Boyer_Moore(PATTERN, text, length);
while (start != NULL) {
for (p = start; ; p--) { /* find start of line */
if (*p == '\0') {
p++;
break;
}
else if (p == text)
break;
}
printf("Match:\n%s\n", p);
for (i=start-p; i>0; i--)
fputc(' ', stdout);
printf("%s\n\n", PATTERN);
start = Boyer_Moore(PATTERN, start+1,
length - (start-text) - 1);
}
}
#endif